setwd("C:\\Documents and Settings\\John\\My Documents\\Server\\Tree_Submission\\JELS\\Replication")

library(rpart)#for running trees
library(foreign)#for importing Stata datasets

true.malice.split <- 6
true.falsity.split <- 4
n <- 300 #number of cases
set.seed(1)#set seeds to get same values every time script is run
falsity <- runif(n, 0, 10)#create uniform variable on 0:10 interger scale
malice <- runif(n,0,10) # same
set.seed(2)
e1 <- rnorm(n,0,2)#1st error term
set.seed(3)
e2 <- rnorm(n,0,2)#2nd error term

y <- ifelse((falsity+e1)>=4 & (malice+e2)>=6, 1,0)
table(y)
#  0   1 
#222   78

#FIGURE 3:

#LOGIT RESULTS

logit.example <- glm(y~falsity + malice, family=binomial (link = "logit"))
summary(logit.example)
predicted.logit <- ifelse(logit.example$fitted.values>.5,1,0)
correct.logit <- predicted.logit==y
table(correct.logit)
#FALSE  TRUE 
#   54   246 =.82

#get proportional reduction in error
#100 x [(% correct.logitly predicted.logit - % in modal category )/(100%-% in modal category.]
modal.cat <- (length(y)-sum(y))/length(y)#get # of cases in modal category
pred.logit.cor  <- sum(correct.logit)/length(y)#percent predicted correctly
pre.logit.libel <- 100*((100*pred.logit.cor - 100*modal.cat)/(100-100*modal.cat))#pre
print(pre.logit.libel, digits = 3)#=39.8

########################################## 
########################################## 
  #TREE
########################################## 
########################################## 

#get line for 2-dimensional partition
#set y = .5, and each coef = 0 to get y-intercept and slope,
    #then take logit of both sides of equation
intercept.logit <- -logit.example$coef[1]/logit.example$coef[2]
slope.logit <-  -intercept.logit/ (-logit.example$coef[1]/logit.example$coef[3])
tree.libel<-rpart(y~falsity + malice, cp=.001, method = "class")#create tree 
post(tree.libel, file="")#plot tree in R graphics window; file command tells R not to create pdf
printcp(tree.libel, digits = 2)#print cp table
#Root node error: 83/300 = 07667
#n= 300 
#         CP nsplit rel error  xerror     xstd
#1 0469880      0   1.00000 1.00000 0.093353
#2 0.0481928      2   0.50602 0.59036 0.077143
#3 0.0040161      4   0.40964 0.57831 0.076504
#4 0.0010000      7   0.39759 0.62651 0.078993
#prune tree based on cp results to 2 splits
tree.libel <- prune(tree.libel, cp = .10)
  #PLOT TREE
post(tree.libel, file="")#plot tree in R graphics window; file command tells R not to create pdf

split.1.error <- 5.046 #1st branch split
split.2.error <- 4.801 #2nd branch split
resub.predicted <- ifelse(predict(tree.libel, type="vector") == 2, 1, 0) #create vector with resubsitution prediction 
                    #default is 2's for 1's and 1's for 0  - use ifelse to switch
correct.resub <- resub.predicted==y
table(correct.resub) 
#FALSE  TRUE = .86
#   41   259 

pred.tree.cor  <- sum(correct.resub)/length(y)#percent predicted correctly for PRE
pred.tree.cor
pre.tree.libel <- 100*((100*pred.tree.cor  - 100*modal.cat)/(100-100*modal.cat))#pre
pre.tree.libel


#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 1000
pre.xv.libel <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))

for (i in 1:n.sims){
xv.libel <- ifelse((xpred.rpart(tree.libel, xval=10, cp = .02)) == 2, 1, 0) #cross-validated prediction; 
correct.xv.libel <- ifelse(xv.libel==y,1,0)
pred.cor.xv.libel  <- sum(correct.xv.libel)/length(y)
pred.correct[i] <- pred.cor.xv.libel
pre.xv.libel[i] <- 100*((100*pred.cor.xv.libel - 100*modal.cat)/(100-100*modal.cat))
}
print(summary(pred.correct)) #Cross-validation prediction rate
print(summary(pre.xv.libel))#Cross-validation PRE

cv.predicted.tree <- ifelse(xpred.rpart(tree.libel)[,2] == 2, 1,0) #cross-validated prediction, contained in 2nd column of matrix
correct.cv.tree <- cv.predicted.tree==y
table(correct.cv.tree)
#FALSE  TRUE = .83
#   47   253 

#################################################################
#################################################################
  #FIGURE 4
#################################################################
#################################################################

pdf("Figure_4.pdf", height = 5, width = 5)

par(mfrow = c(1,1), mar=c(4,4,1,1), pty = "s")#make plot square
plot(malice, falsity, xlab="",ylab="", type="n", axes = F, 
    xaxs = "i", yaxs = "i", xlim = c(0,10), ylim = c(0,10)) # call empty plot
axis(1, at = seq(0,10, by = 2), mgp = c(2,.5,0), cex.axis = 1.2)
axis(2, at = seq(0,10, by = 2), las = 1, mgp = c(2,.5,0) , cex.axis = 1.2)
mtext("Malice", 1, line = 2, cex = 1.5)
mtext("Falsity", 2, line = 2, cex = 1.5)
#add shading for incorrect regions of logit partition
    #draw first so it doesn't cancel out lines
#first: get intersections where logit lines hits plot boundaries and true rule
top.int <- (10-intercept.logit)/slope.logit#where line hits top of panel
right.int <- intercept.logit + 10*slope.logit#where line hits right of panel
top.rule.int <- intercept.logit + slope.logit*true.malice.split#where line crosses vertical split of rule
right.rule.int <- (true.falsity.split - intercept.logit)/slope.logit#
#shading -- first -- where logit is wrong
polygon(x=c(top.int, true.malice.split, true.malice.split),
    y = c(10, 10,top.rule.int ), #left-hand block of "not liable" region
    col= "gray90",  border=F)
polygon(x=c(true.malice.split, true.malice.split, right.rule.int),
    y = c(top.rule.int, true.falsity.split,  true.falsity.split), #left-hand block of "not liable" region
    col= "gray90",  border=F)
polygon(x=c(right.rule.int, 10,10),
    y = c( true.falsity.split, true.falsity.split, right.int), #left-hand block of "not liable" region
    col= "gray90",  border=F)

box() #draw box around plot -- this is the case-space region
segments(true.malice.split,true.falsity.split,true.malice.split,10)#draw true rule -- solid line
segments(true.malice.split,true.falsity.split,10,true.falsity.split)
segments(split.1.error,split.2.error,split.1.error,10, lwd = 1.1, lty  = 5)#vertical line marking estimates case partition
segments(split.1.error,split.2.error,10,split.2.error, lwd = 1.1, lty = 5)#horizontal line marking case partition
abline(intercept.logit,slope.logit,lty=2,  lwd = 1.1) #Plot logit partition
text(3,3,"Not Liable", cex = 1.5, , font = 2)
text(8.5,7,"Liable", cex = 1.5, font = 2)
text(2.5,9.3,"Logit partition", cex = 1.2)
text(7.9,9, "True rule", cex = 1.2)
text(2.9,6, "Tree partition", cex = 1.2)
arrows(4,9.3,4.5,9.3, length =.05) #Logit arrow
arrows(6.7,9,6.15,9, length =.05)   # Rule arrow
arrows(4.5,6,4.9,6, length =.05) #Tree arrow

dev.off()
